import_data("jake_gyllenhaal")
filmes <- read_imported_data()
filmes %>%
glimpse()
Observations: 20
Variables: 5
$ avaliacao <int> 92, 68, 73, 52, 73, 59, 82, 85, 92, 49, 35, 64, 47, 90, 87, 61, 62, 44, ...
$ filme <chr> "Stronger", "Life", "Nocturnal Animals", "Demolition", "Everest", "South...
$ papel <chr> "Jeff Bauman", "David Jordan", "Tony HastingsEdward Sheffield", "Davis M...
$ bilheteria <dbl> 4.2, 30.2, 10.7, 1.7, 46.6, 42.4, 61.0, 39.1, 54.7, 33.3, 90.8, 28.6, 9....
$ ano <int> 2017, 2017, 2016, 2016, 2015, 2015, 2013, 2012, 2011, 2010, 2010, 2009, ...
Data Overview
Bilheteria
filmes %>%
ggplot(aes(x = ano, y = bilheteria)) +
geom_point(size = 4, color = paleta[1])

filmes %>%
ggplot(aes(x = bilheteria)) +
geom_histogram(binwidth = 10, boundary = 0,
fill = "grey", color = "black") +
geom_rug(size = .5) +
scale_x_continuous(breaks=seq(0,210,10))

filmes %>%
group_by(filme) %>%
ggplot(aes(sample=bilheteria)) +
stat_qq()

p <- filmes %>%
ggplot(aes(x = "",
y = bilheteria,
label = filme,
text = paste("Filme:",filme,
"\nBilheteria:",
bilheteria,"m"))) +
geom_jitter(width = .05, alpha = .3, size = 3) +
labs(x = "")
ggplotly(p, tooltip="text") %>%
layout(autosize = F)
Avaliação
filmes %>%
ggplot(aes(x = ano, y = avaliacao)) +
geom_point(size = 4, color = paleta[1]) +
scale_y_continuous(limits = c(0, 100))

filmes %>%
ggplot(aes(x = avaliacao)) +
geom_histogram(binwidth = 10, boundary = 0,
fill = paleta[3], color = "black") +
geom_rug(size = .5)

filmes %>%
group_by(filme) %>%
ggplot(aes(sample=avaliacao)) +
stat_qq()

p <- filmes %>%
ggplot(aes(x = "",
y = avaliacao,
text = paste(
"Filme:",filme,
"\nAvaliação:",avaliacao))) +
geom_jitter(width = .05, alpha = .3, size = 3) +
labs(x = "")
ggplotly(p, tooltip = "text") %>%
layout(autosize = F)
Agrupamento hierárquico
Uma dimensão
agrupamento_h = filmes %>%
mutate(nome = paste0(filme, " (av=", avaliacao, ")")) %>%
as.data.frame() %>%
column_to_rownames("filme") %>%
select(avaliacao) %>%
dist(method = "euclidian") %>%
hclust(method = "ward.D")
ggdendrogram(agrupamento_h, rotate = T, size = 2, theme_dendro = F) +
labs(y = "Dissimilaridade", x = "", title = "Dendrograma")

get_grupos <- function(agrupamento, num_grupos){
agrupamento %>%
cutree(num_grupos) %>%
as.data.frame() %>%
mutate(label = rownames(.)) %>%
gather(key = "k", value = "grupo", -label) %>%
mutate(grupo = as.character(grupo))
}
atribuicoes = get_grupos(agrupamento_h, num_grupos = 1:6)
atribuicoes = atribuicoes %>%
left_join(filmes, by = c("label" = "filme"))
atribuicoes %>%
ggplot(aes(x = "Filmes", y = avaliacao, colour = grupo)) +
geom_jitter(width = .02, height = 0, size = 1.6, alpha = .6) +
facet_wrap(~ paste(k, " grupos")) +
scale_color_brewer(palette = "Dark2")

k_escolhido = 3
p <-atribuicoes %>%
filter(k == k_escolhido) %>%
ggplot(aes(x = reorder(label, avaliacao),
y = avaliacao,
colour = grupo,
text = paste(
"Filme:", reorder(label, avaliacao),
"\nAvaliação:", avaliacao,
"\nGrupo:", grupo))) +
geom_jitter(width = .02, height = 0, size = 3, alpha = .6) +
facet_wrap(~ paste(k, " grupos")) +
scale_color_brewer(palette = "Dark2") +
labs(x = "", y = "Avaliação RT") +
coord_flip()
ggplotly(p,tooltip = "text") %>%
layout(autosize = F)
Com duas dimensões
agrupamento_h_2d = filmes %>%
mutate(bilheteria = log10(bilheteria)) %>%
mutate_at(vars("avaliacao", "bilheteria"), funs(scale)) %>%
column_to_rownames("filme") %>%
select("avaliacao", "bilheteria") %>%
dist(method = "euclidean") %>%
hclust(method = "ward.D")
Setting row names on a tibble is deprecated.
ggdendrogram(agrupamento_h_2d, rotate = TRUE, theme_dendro = F)

filmes2 <- filmes %>%
mutate(bilheteria = log10(bilheteria))
plota_hclusts_2d(agrupamento_h_2d,
filmes2,
c("avaliacao", "bilheteria"),
linkage_method = "ward.D",
ks = 1:6,
palette = "Dark2") +
scale_y_log10()

atribuicoes = get_grupos(agrupamento_h_2d, num_grupos = 1:6)
atribuicoes = atribuicoes %>%
filter(k == 3) %>%
mutate(filme = label) %>%
left_join(filmes, by = "filme")
p <- atribuicoes %>%
ggplot(aes(x = avaliacao,
y = bilheteria,
colour = grupo,
text = paste(
"Filme:", filme,
"\nBilheteria:", bilheteria,"m\n",
"Avaliação:", avaliacao))) +
geom_jitter(width = .02, height = 0, size = 3, alpha = .6) +
facet_wrap(~ paste(k, " grupos")) +
scale_color_brewer(palette = "Dark2") +
scale_y_log10()
ggplotly(p, tooltip = "text") %>%
layout(autosize = F)
LS0tCnRpdGxlOiAiVGlwb3MgZGUgZmlsbWUgZGUgSmFrZSBHeWxsZW5oYWFsIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwotLS0KCmBgYHtyIGVjaG89RkFMU0UsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGhlcmUpCmxpYnJhcnkoY2x1c3RlcikKbGlicmFyeShwbG90bHkpCmxpYnJhcnkoZ2dkZW5kcm8pCgpzb3VyY2UoaGVyZTo6aGVyZSgiY29kZS9saWIuUiIpKQpzb3VyY2UoaGVyZTo6aGVyZSgiY29kZS9wbG90YV9zb2x1Y29lc19oY2x1c3QuUiIpKQoKdGhlbWVfc2V0KHRoZW1lX3JlcG9ydCgpKQoKa25pdHI6Om9wdHNfY2h1bmskc2V0KHRpZHkgPSBGQUxTRSwKICAgICAgICAgICAgICAgICAgICAgIGZpZy53aWR0aCA9IDYsCiAgICAgICAgICAgICAgICAgICAgICBmaWcuaGVpZ2h0ID0gNSwKICAgICAgICAgICAgICAgICAgICAgIGVjaG8gPSBUUlVFKQpwYWxldGEgPSBjKCIjNDA0RTREIiwKICAgICAgICAgICAiIzkyRENFNSIsCiAgICAgICAgICAgIiM5MzhCQTEiLAogICAgICAgICAgICIjMkQzMTQyIiwKICAgICAgICAgICAiI0Y0NzQzQiIpCmBgYAoKYGBge3IgcmVhZCwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KaW1wb3J0X2RhdGEoImpha2VfZ3lsbGVuaGFhbCIpIApmaWxtZXMgPC0gcmVhZF9pbXBvcnRlZF9kYXRhKCkKZmlsbWVzICU+JSAKICAgIGdsaW1wc2UoKQpgYGAKCgojIyBEYXRhIE92ZXJ2aWV3CgojIyMgQmlsaGV0ZXJpYQoKYGBge3J9CmZpbG1lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSBhbm8sIHkgPSBiaWxoZXRlcmlhKSkgKyAKICAgIGdlb21fcG9pbnQoc2l6ZSA9IDQsIGNvbG9yID0gcGFsZXRhWzFdKSAKYGBgCgoKCmBgYHtyfQpmaWxtZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gYmlsaGV0ZXJpYSkpICsgCiAgICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEwLCBib3VuZGFyeSA9IDAsIAogICAgICAgICAgICAgICAgICAgZmlsbCA9ICJncmV5IiwgY29sb3IgPSAiYmxhY2siKSArIAogICAgZ2VvbV9ydWcoc2l6ZSA9IC41KSArCiAgICBzY2FsZV94X2NvbnRpbnVvdXMoYnJlYWtzPXNlcSgwLDIxMCwxMCkpCmBgYAoKYGBge3J9CmZpbG1lcyAlPiUgCiAgICBncm91cF9ieShmaWxtZSkgJT4lCiAgICBnZ3Bsb3QoYWVzKHNhbXBsZT1iaWxoZXRlcmlhKSkgKyAKICAgICAgICBzdGF0X3FxKCkKYGBgCgpgYGB7cn0KcCA8LSBmaWxtZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gIiIsCiAgICAgICAgICAgICAgIHkgPSBiaWxoZXRlcmlhLAogICAgICAgICAgICAgICBsYWJlbCA9IGZpbG1lLAogICAgICAgICAgICAgICB0ZXh0ID0gcGFzdGUoIkZpbG1lOiIsZmlsbWUsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAiXG5CaWxoZXRlcmlhOiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBiaWxoZXRlcmlhLCJtIikpKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGggPSAuMDUsIGFscGhhID0gLjMsIHNpemUgPSAzKSArIAogICAgbGFicyh4ID0gIiIpCgpnZ3Bsb3RseShwLCB0b29sdGlwPSJ0ZXh0IikgJT4lIAogICAgbGF5b3V0KGF1dG9zaXplID0gRikKYGBgCgojIyMgQXZhbGlhw6fDo28KCmBgYHtyfQpmaWxtZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gYW5vLCB5ID0gYXZhbGlhY2FvKSkgKyAKICAgIGdlb21fcG9pbnQoc2l6ZSA9IDQsIGNvbG9yID0gcGFsZXRhWzFdKSAgKwogICAgc2NhbGVfeV9jb250aW51b3VzKGxpbWl0cyA9IGMoMCwgMTAwKSkKYGBgCgpgYGB7cn0KZmlsbWVzICU+JSAKICAgIGdncGxvdChhZXMoeCA9IGF2YWxpYWNhbykpICsgCiAgICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEwLCBib3VuZGFyeSA9IDAsIAogICAgICAgICAgICAgICAgICAgZmlsbCA9IHBhbGV0YVszXSwgY29sb3IgPSAiYmxhY2siKSArIAogICAgZ2VvbV9ydWcoc2l6ZSA9IC41KSAKYGBgCgpgYGB7cn0KZmlsbWVzICU+JSAKICAgIGdyb3VwX2J5KGZpbG1lKSAlPiUKICAgIGdncGxvdChhZXMoc2FtcGxlPWF2YWxpYWNhbykpICsgCiAgICBzdGF0X3FxKCkgCmBgYAoKYGBge3J9CnAgPC0gZmlsbWVzICU+JSAKICAgIGdncGxvdChhZXMoeCA9ICIiLAogICAgICAgICAgICAgICB5ID0gYXZhbGlhY2FvLAogICAgICAgICAgICAgICB0ZXh0ID0gcGFzdGUoCiAgICAgICAgICAgICAgICAgICAgIkZpbG1lOiIsZmlsbWUsCiAgICAgICAgICAgICAgICAgICAgIlxuQXZhbGlhw6fDo286IixhdmFsaWFjYW8pKSkgKyAKICAgIGdlb21faml0dGVyKHdpZHRoID0gLjA1LCBhbHBoYSA9IC4zLCBzaXplID0gMykgKyAKICAgIGxhYnMoeCA9ICIiKQoKZ2dwbG90bHkocCwgdG9vbHRpcCA9ICJ0ZXh0IikgJT4lIAogICAgbGF5b3V0KGF1dG9zaXplID0gRikKCmBgYAoKIyMgQWdydXBhbWVudG8gaGllcsOhcnF1aWNvCgojIyMgVW1hIGRpbWVuc8OjbwoKYGBge3J9CmFncnVwYW1lbnRvX2ggPSBmaWxtZXMgJT4lIAogICAgbXV0YXRlKG5vbWUgPSBwYXN0ZTAoZmlsbWUsICIgKGF2PSIsIGF2YWxpYWNhbywgIikiKSkgJT4lIAogICAgYXMuZGF0YS5mcmFtZSgpICU+JSAKICAgIGNvbHVtbl90b19yb3duYW1lcygiZmlsbWUiKSAlPiUgCiAgICBzZWxlY3QoYXZhbGlhY2FvKSAlPiUKICAgIGRpc3QobWV0aG9kID0gImV1Y2xpZGlhbiIpICU+JSAKICAgIGhjbHVzdChtZXRob2QgPSAid2FyZC5EIikKCmdnZGVuZHJvZ3JhbShhZ3J1cGFtZW50b19oLCByb3RhdGUgPSBULCBzaXplID0gMiwgdGhlbWVfZGVuZHJvID0gRikgKyAKICAgIGxhYnMoeSA9ICJEaXNzaW1pbGFyaWRhZGUiLCB4ID0gIiIsIHRpdGxlID0gIkRlbmRyb2dyYW1hIikKYGBgCgpgYGB7cn0KZ2V0X2dydXBvcyA8LSBmdW5jdGlvbihhZ3J1cGFtZW50bywgbnVtX2dydXBvcyl7CiAgICBhZ3J1cGFtZW50byAlPiUgCiAgICAgICAgY3V0cmVlKG51bV9ncnVwb3MpICU+JSAKICAgICAgICBhcy5kYXRhLmZyYW1lKCkgJT4lIAogICAgICAgIG11dGF0ZShsYWJlbCA9IHJvd25hbWVzKC4pKSAlPiUgCiAgICAgICAgZ2F0aGVyKGtleSA9ICAiayIsIHZhbHVlID0gImdydXBvIiwgLWxhYmVsKSAlPiUgCiAgICAgICAgbXV0YXRlKGdydXBvID0gYXMuY2hhcmFjdGVyKGdydXBvKSkKfQoKYXRyaWJ1aWNvZXMgPSBnZXRfZ3J1cG9zKGFncnVwYW1lbnRvX2gsIG51bV9ncnVwb3MgPSAxOjYpCgphdHJpYnVpY29lcyA9IGF0cmlidWljb2VzICU+JSAKICAgIGxlZnRfam9pbihmaWxtZXMsIGJ5ID0gYygibGFiZWwiID0gImZpbG1lIikpCgphdHJpYnVpY29lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSAiRmlsbWVzIiwgeSA9IGF2YWxpYWNhbywgY29sb3VyID0gZ3J1cG8pKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGggPSAuMDIsIGhlaWdodCA9IDAsIHNpemUgPSAxLjYsIGFscGhhID0gLjYpICsgCiAgICBmYWNldF93cmFwKH4gcGFzdGUoaywgIiBncnVwb3MiKSkgKyAKICAgIHNjYWxlX2NvbG9yX2JyZXdlcihwYWxldHRlID0gIkRhcmsyIikKCmBgYAoKYGBge3J9CmtfZXNjb2xoaWRvID0gMwoKcCA8LWF0cmlidWljb2VzICU+JSAKICAgIGZpbHRlcihrID09IGtfZXNjb2xoaWRvKSAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSByZW9yZGVyKGxhYmVsLCBhdmFsaWFjYW8pLAogICAgICAgICAgICAgICB5ID0gYXZhbGlhY2FvLAogICAgICAgICAgICAgICBjb2xvdXIgPSBncnVwbywKICAgICAgICAgICAgICAgdGV4dCA9IHBhc3RlKAogICAgICAgICAgICAgICAgICAgICJGaWxtZToiLCByZW9yZGVyKGxhYmVsLCBhdmFsaWFjYW8pLAogICAgICAgICAgICAgICAgICAgICJcbkF2YWxpYcOnw6NvOiIsIGF2YWxpYWNhbywKICAgICAgICAgICAgICAgICAgICAiXG5HcnVwbzoiLCBncnVwbykpKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGggPSAuMDIsIGhlaWdodCA9IDAsIHNpemUgPSAzLCBhbHBoYSA9IC42KSArIAogICAgZmFjZXRfd3JhcCh+IHBhc3RlKGssICIgZ3J1cG9zIikpICsgCiAgICBzY2FsZV9jb2xvcl9icmV3ZXIocGFsZXR0ZSA9ICJEYXJrMiIpICsgCiAgICBsYWJzKHggPSAiIiwgeSA9ICJBdmFsaWHDp8OjbyBSVCIpICsgCiAgICBjb29yZF9mbGlwKCkKCmdncGxvdGx5KHAsdG9vbHRpcCA9ICJ0ZXh0IikgJT4lCiAgICBsYXlvdXQoYXV0b3NpemUgPSBGKQoKYGBgCgojIyMgQ29tIGR1YXMgZGltZW5zw7VlcwoKYGBge3J9CmFncnVwYW1lbnRvX2hfMmQgPSBmaWxtZXMgJT4lCiAgIG11dGF0ZShiaWxoZXRlcmlhID0gbG9nMTAoYmlsaGV0ZXJpYSkpICU+JQogICBtdXRhdGVfYXQodmFycygiYXZhbGlhY2FvIiwgImJpbGhldGVyaWEiKSwgZnVucyhzY2FsZSkpICU+JQogICBjb2x1bW5fdG9fcm93bmFtZXMoImZpbG1lIikgJT4lCiAgIHNlbGVjdCgiYXZhbGlhY2FvIiwgImJpbGhldGVyaWEiKSAlPiUKICAgZGlzdChtZXRob2QgPSAiZXVjbGlkZWFuIikgJT4lCiAgIGhjbHVzdChtZXRob2QgPSAid2FyZC5EIikKCmdnZGVuZHJvZ3JhbShhZ3J1cGFtZW50b19oXzJkLCByb3RhdGUgPSBUUlVFLCB0aGVtZV9kZW5kcm8gPSBGKQpgYGAKCmBgYHtyfQpmaWxtZXMyIDwtIGZpbG1lcyAlPiUKICAgIG11dGF0ZShiaWxoZXRlcmlhID0gbG9nMTAoYmlsaGV0ZXJpYSkpCgpwbG90YV9oY2x1c3RzXzJkKGFncnVwYW1lbnRvX2hfMmQsCiAgICAgICAgICAgICAgICBmaWxtZXMyLAogICAgICAgICAgICAgICAgYygiYXZhbGlhY2FvIiwgImJpbGhldGVyaWEiKSwKICAgICAgICAgICAgICAgIGxpbmthZ2VfbWV0aG9kID0gIndhcmQuRCIsIAogICAgICAgICAgICAgICAga3MgPSAxOjYsCiAgICAgICAgICAgICAgICBwYWxldHRlID0gIkRhcmsyIikgKyAKICAgIHNjYWxlX3lfbG9nMTAoKQpgYGAKCmBgYHtyfQphdHJpYnVpY29lcyA9IGdldF9ncnVwb3MoYWdydXBhbWVudG9faF8yZCwgbnVtX2dydXBvcyA9IDE6NikKCmF0cmlidWljb2VzID0gYXRyaWJ1aWNvZXMgJT4lIAogICAgZmlsdGVyKGsgPT0gMykgJT4lCiAgICBtdXRhdGUoZmlsbWUgPSBsYWJlbCkgJT4lIAogICAgbGVmdF9qb2luKGZpbG1lcywgYnkgPSAiZmlsbWUiKQoKcCA8LSBhdHJpYnVpY29lcyAlPiUKICAgIGdncGxvdChhZXMoeCA9IGF2YWxpYWNhbywKICAgICAgICAgICAgICAgeSA9IGJpbGhldGVyaWEsCiAgICAgICAgICAgICAgIGNvbG91ciA9IGdydXBvLAogICAgICAgICAgICAgICB0ZXh0ID0gcGFzdGUoCiAgICAgICAgICAgICAgICAgICAgIkZpbG1lOiIsIGZpbG1lLAogICAgICAgICAgICAgICAgICAgICJcbkJpbGhldGVyaWE6IiwgYmlsaGV0ZXJpYSwibVxuIiwKICAgICAgICAgICAgICAgICAgICAiQXZhbGlhw6fDo286IiwgYXZhbGlhY2FvKSkpICsgCiAgICBnZW9tX2ppdHRlcih3aWR0aCA9IC4wMiwgaGVpZ2h0ID0gMCwgc2l6ZSA9IDMsIGFscGhhID0gLjYpICsgCiAgICBmYWNldF93cmFwKH4gcGFzdGUoaywgIiBncnVwb3MiKSkgKyAKICAgIHNjYWxlX2NvbG9yX2JyZXdlcihwYWxldHRlID0gIkRhcmsyIikgKwogICAgc2NhbGVfeV9sb2cxMCgpCgpnZ3Bsb3RseShwLCB0b29sdGlwID0gInRleHQiKSAlPiUKICAgIGxheW91dChhdXRvc2l6ZSA9IEYpCmBgYAo=